﻿using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml.Linq;

namespace CompanyDic.SearchEngine
{
    public class WebHelper
    {
        public static string GetHtml(string uri)
        {
            WebRequest request = WebRequest.Create(uri);
            WebResponse response = request.GetResponse();
            using (Stream stream = response.GetResponseStream())
            {
                using (StreamReader reader = new StreamReader(stream))
                {
                    return reader.ReadToEnd();
                }
            }
        }

        public static List<CompanySummary> HtmlAnalyzer(string html, string city)
        {
            List<CompanySummary> companySummaries = new List<CompanySummary>();

            Regex rLink = new Regex("<a[\\d\\D]+?</a>");
            string href = string.Empty;
            string companyName = string.Empty;
            foreach (Match m in rLink.Matches(html))
            {
                if (m.Value.IndexOf("companyLink") < 0)
                    continue;

                XElement node;
                try
                {
                    node = XElement.Parse(m.Value);
                }
                catch
                {
                    continue;
                }
                
                href = node.Attribute("href").Value;
                companyName = node.Value;

                if (!string.IsNullOrEmpty(href) && !string.IsNullOrEmpty(companyName))
                    companySummaries.Add(new CompanySummary() {Name = companyName, SourceUrl = href, City = city});
            }

            return companySummaries;
        }
    }
}
